from tqdm.auto import tqdm
import re
from llm_generation import llm_generation

def extract_example_from_string(txt : str) -> str:
    pattern = re.compile(r'```TypeScript(.*?)```', re.DOTALL)
    match = re.search(pattern, txt)
    if not match:
        return ''
    extracted_code = match.group(1).strip()

    # extracted for office scripts
    extracted_code = extracted_code[extracted_code.find("function main"):]

    extracted_code = re.sub(r"//.*?\n|/\*.*?\*/", "", extracted_code, flags=re.DOTALL)
    return extracted_code

def extract_examples_from_doc(doc_entity)-> list:
    exm_list = []
    if isinstance(doc_entity, dict):
        for k, v in doc_entity.items():
            exm_list = exm_list + extract_examples_from_doc(v)
    elif isinstance(doc_entity, list):
        for v in doc_entity:
            exm_list = exm_list + extract_examples_from_doc(v)
    elif isinstance(doc_entity, str):
        exm = extract_example_from_string(doc_entity)
        if exm is None or len(exm) == 0:
            return []
        exm_list.append(exm)
    return exm_list

def get_query_for_example(example: str, exm2nl_prompt)-> str:
    prompt = exm2nl_prompt.replace('<code_example>', example)
    exm_query = llm_generation(prompt)
    if exm_query is None:
        return None
    idx = exm_query.find("<|im_end|>")
    if idx == -1:
        return exm_query.strip()
    else:
        return exm_query[:idx].strip()

def get_example_corpus(doc_entity, exm2nl_prompt):
    exm_list = extract_examples_from_doc(doc_entity)
    exm_list = list(set(exm_list))
    example_corpus = []
    for exm in tqdm(exm_list):
        exm_query = get_query_for_example(exm, exm2nl_prompt)
        example_corpus.append({'query': exm_query, 'program': exm})    
    return example_corpus

def get_uids_having_no_example(pg_dict):
    no_example_uids = []
    for uid, pg in pg_dict.items():
        example_list = extract_examples_from_doc(pg)
        if len(example_list) == 0:
            no_example_uids.append(uid)
    return no_example_uids

